
use ../raw/census1990/census1990.dta, clear

rename serial hhcode

tab cn90a_prov,m

clonevar prov=cn90a_prov
bys prov: g n=_n
count if n==1 // 30 provinces
drop n

order hhcode prov 

** household level information from the raw data
preserve

bys hhcode: keep if _n == 1 // one obs for each hh

	* collective or domestic, household level
tab cn90a_gq,m nol
g hhdomestic = cn90a_gq == 1
	
keep hhcode prov hhdomestic

save ../temp/census1990_hhraw, replace

restore


*** individual level information

* relation to the household head
clonevar hstatus = cn90a_relate
label list cn90a_relate_lbl

tab hstatus, nol

* age
tab cn90a_birthy, m //birth year
tab cn90a_birthm, m //birth month

tab cn90a_age, nolabel

g birthy = cn90a_birthy + 1000
g birthm = cn90a_birthm

clonevar age=cn90a_age

* gender
tab cn90a_sex,m nolabel

g male=cn90a_sex==1
replace male=. if cn90a_sex==.

	* family status: father, mother or children
gen fstatus = 1 if (hstatus == 1 | hstatus == 2) & male == 1
replace fstatus = 2 if (hstatus == 1 | hstatus == 2) & male == 0
replace fstatus = 3 if hstatus == 3
replace fstatus = 4 if hstatus == 4
replace fstatus = 5 if hstatus == 5
replace fstatus = 6 if hstatus == 6
replace fstatus = 7 if hstatus == 7
replace fstatus = 8 if hstatus == 8
replace fstatus = 9 if missing(fstatus) & missing(hstatus)
tab fstatus, m 

label define fstatus_lbl 1 "father" 2 "mother" 3 "child" 4 "grandchild" 5 "grandparent" 6 "grandgrandparent" 7 "other relatives" 8 "other non-relatives"
label values fstatus fstatus_lbl

tab fstatus,m
	

* ethnic indicator
tab cn90a_nation,m nolabel

g han=cn90a_nation==1
replace han=. if cn90a_nation > 56 // foreigner, unidentified, or other errors

clonevar ethn = cn90a_nation

* hukou, 1:urban, 0:rural
tab cn90a_hhtyap,m nolabel

g hukou=cn90a_hhtyap==2
replace hukou=. if cn90a_hhtyap==0

* migrant status
tab cn90a_regist,m

clonevar regstatus = cn90a_regist

g localhukou = regstatus == 1
replace localhukou = . if missing(regstatus)

* usual residence in 1985
tab cn90a_res85,m nolabel
clonevar reside85 = cn90a_res85

tab cn90a_res85t
clonevar reside85type = cn90a_res85t

tab cn90a_migreas
clonevar migreason = cn90a_migreas

* education years
tab cn90a_edlev1, m nolabel

clonevar edu = cn90a_edlev1

g eduy=0 if cn90a_edlev1<2
replace eduy=6 if cn90a_edlev1==2
replace eduy=9 if cn90a_edlev1==3
replace eduy=12 if cn90a_edlev1==4
replace eduy=13 if cn90a_edlev1==5
replace eduy=15 if cn90a_edlev1==6
replace eduy=16 if cn90a_edlev1==7

tab eduy, m

tab cn90a_edlev2, m 

clonevar edus = cn90a_edlev2

* employment
tab cn90a_ind,m
tab cn90a_occ,m
tab cn90a_unempst,m
gen emp = 1 if cn90a_ind ~=0
replace emp = 1 if cn90a_occ ~=0
replace emp = 0 if cn90a_unempst ~=0 

tab emp,m

clonevar industry = cn90a_ind
clonevar occupation = cn90a_occ
clonevar unempstatus = cn90a_unempst

* agriculture employment
tostring industry, g(industry_str)
replace industry_str="" if industry_str=="0"
replace industry_str="0"+industry_str if length(industry_str)==2

count if length(industry_str) == 1
count if length(industry_str) == 2
count if length(industry_str) == 3

g industry2d = substr(industry_str,1,2)
destring industry2d, replace
drop industry_str

g empAgri = industry2d >= 1 & industry2d <= 5
replace empAgri = . if missing(industry2d)

tab empAgri, m


* birth history
tab cn90a_chbornm,m nolabel

foreach i in cn90a_chbornm cn90a_chbornf cn90a_chsurvm cn90a_chsurvf {

	replace `i'=0 if `i'==99

}

clonevar chbornm = cn90a_chbornm
clonevar chbornf = cn90a_chbornf
clonevar chsurvm = cn90a_chsurvm
clonevar chsurvf = cn90a_chsurvf

foreach var of varlist chbornm chbornf chsurvm chsurvf  {
	replace `var' = . if male == 1
	replace `var' = . if male == 0 & (age >= 65 | age < 15) & !missing(age)
}

g chborn = chbornm + chbornf
g chsurv = chsurvm + chsurvf

g chdecem = chbornm - chsurvm
g chdecef = chbornf - chsurvf
g chdece = chborn - chsurv

sum chdece chdecem chdecef // more male death <- more male birth

g chborn_maleratio = chbornm/chborn
g chsurv_maleratio = chsurvm/chsurv
g chdece_maleratio = chdecem/chdece

sum chborn_maleratio chsurv_maleratio chdece_maleratio

* recent birth
tab cn90a_chb89_90
clonevar chbornLY = cn90a_chb89_90
replace chbornLY = . if chbornLY == 99



*labels
label var age "Age"
label var birthy "Birth year"
label var birthm "Birth month"
label var male" Male"
label var han "Han"
label var hukou "Hukou"
label var edu "Education level"
label var eduy "Educational years"
label var edus "Educational status"
label var empAgri "Agricultural employment"

label var chbornm "boys ever born"
label var chbornf "girls ever born"
label var chsurvm "boys survived"
label var chsurvf "girls survived"
label var chborn "# births"
label var chsurv "# survival children"
label var chborn_maleratio "boys/(boys+girls) ever born"
label var chsurv_maleratio "boys/(boys+girls) survived"	

compress

** save individual level data
drop cntry-cn90a_age
	
save ../data/census1990individual, replace

	
	
